library(dplyr)
# For Model fitting
library(lme4)
library(nlme)
library(purrr)
# For diagnostics
library(performance)
# For adding new columns
library(tibble)
library(MuMIn)
# Load data
sys.source("./scripts/code_join_data_full_dataset.R", envir = knitr::knit_global())
# Load functions
## Models
sys.source("./R/functions_models.R", envir = knitr::knit_global())
sys.source("./R/function_nlme_validation_plots.R", envir = knitr::knit_global())

Model Fitting

Models: Questions 1,2 and Mass fractions

Select performance and traits variables

data_for_models <-
    data_for_models %>%
        # Select variables for analysis
        dplyr::select(c(1:7,9,11:ncol(data_for_models)))

\[response\sim treatment*fixer\ + initial\ height + random( 1|\ specie)\]

# Take response variables names 
response_vars_q1_q2 <- 
    set_names(names(data_for_models)[c(5:11, 12:15,17)])

response_vars_q1_q2 
  total_biomass   above_biomass   below_biomass             rgr             rmf 
"total_biomass" "above_biomass" "below_biomass"           "rgr"           "rmf" 
            smf             lmf            amax              gs             wue 
          "smf"           "lmf"          "amax"            "gs"           "wue" 
           d13c            pnue 
         "d13c"          "pnue" 
models_q1_q2_mass_fractions <- map(response_vars_q1_q2, ~ mixed_model_1(response = .x, 
                                                              data = data_for_models))
# Log models

model_q2_wue_log <- lmer(log(wue) ~ nfixer*treatment + init_height + (1|spcode), 
                         data = data_for_models)

model_q2_pnue_log <- lmer(log(pnue) ~ nfixer*treatment + init_height + (1|spcode), 
                         data = data_for_models) 


model_rmf_log <- lmer(log(rmf) ~ nfixer*treatment + init_height + (1|spcode), 
                         data = data_for_models)

model_smf_log <- lmer(log(smf) ~ nfixer*treatment + init_height + (1|spcode), 
                         data = data_for_models) 

model_lmf_log <- lmer(log(lmf) ~ nfixer*treatment + init_height + (1|spcode), 
                         data = data_for_models) 
# Append log models to model list Q1 Q2
log_models <- list(model_q2_wue_log, model_q2_pnue_log,
                   model_rmf_log, model_smf_log, model_lmf_log)


names(log_models) <- c("wue_log", "pnue_log", 
                       "rmf_log", "smf_log", "lmf_log")

models_q1_q2_mass_fractions <- append(log_models, models_q1_q2_mass_fractions)

length(models_q1_q2_mass_fractions)
[1] 17
names(models_q1_q2_mass_fractions)
 [1] "wue_log"       "pnue_log"      "rmf_log"       "smf_log"      
 [5] "lmf_log"       "total_biomass" "above_biomass" "below_biomass"
 [9] "rgr"           "rmf"           "smf"           "lmf"          
[13] "amax"          "gs"            "wue"           "d13c"         
[17] "pnue"         

Model: Nodule count

# Load data
# This step was done like this because I am working with a subset of the data
# source cleaned data
source("./scripts/code_clean_data_nodules.R")

# Delete unused variables
data_nodules_cleaned <-
    data_nodules_cleaned %>%
        
        # add id to rownames for keep track of the rows
        column_to_rownames("id") %>% 
        dplyr::select(spcode,treatment, everything())

lmer gaussian

lmer_gaussian <- lmer(number_of_root_nodulation ~ treatment + init_height + 
                           (1 |spcode),data = data_nodules_cleaned)

lmer gaussian log

lmer_gaussian_log <-  lmer(log(number_of_root_nodulation) ~ treatment + init_height + 
                           (1 |spcode),data = data_nodules_cleaned)

Compare models

# Based on this output I decided to use the log model
MuMIn::model.sel(lmer_gaussian, lmer_gaussian_log)
Model selection table 
                   (Int) int_hgh trt df   logLik  AICc  delta weight
lmer_gaussian_log  2.943 0.05571   +  7  -35.282  87.4   0.00      1
lmer_gaussian     38.350 1.67500   +  7 -195.570 407.9 320.58      0
Models ranked by AICc(x) 
Random terms (all models): 
  1 | spcode
AIC(lmer_gaussian, lmer_gaussian_log)
                  df       AIC
lmer_gaussian      7 405.13948
lmer_gaussian_log  7  84.56385

Improving log model

## After seeing the validation plots for the lmer_gaussian_log I decided to try 
## to improve the log model using the nlme package


nlme_gaussian_log <- lme(log(number_of_root_nodulation) ~ treatment + init_height,
                                    random = ~1|spcode,
                                    data = data_nodules_cleaned)



nlme_nodule_log_weights <- lme(log(number_of_root_nodulation) ~ treatment + init_height,
                                    random = ~1|spcode,
                                    weights = varIdent(form = ~1|spcode),
                                    data = data_nodules_cleaned)

Compare models

# Check if the nlme_gaussian_log_weights is a better model

# Compare between nlme objects
anova(nlme_gaussian_log, nlme_nodule_log_weights )
                        Model df      AIC      BIC    logLik   Test  L.Ratio
nlme_gaussian_log           1  7 84.56385 96.89225 -35.28193                
nlme_nodule_log_weights     2  9 62.79085 78.64165 -22.39542 1 vs 2 25.77301
                        p-value
nlme_gaussian_log              
nlme_nodule_log_weights  <.0001
model.sel(nlme_gaussian_log, lmer_gaussian_log,  nlme_nodule_log_weights)
Model selection table 
                        (Int) int_hgh trt   class  weights df  logLik AICc
nlme_nodule_log_weights 3.344 0.03767   +     lme vrI(spc)  9 -22.395 67.5
lmer_gaussian_log       2.943 0.05571   + lmerMod           7 -35.282 87.4
nlme_gaussian_log       2.943 0.05571   +     lme           7 -35.282 87.4
                        delta weight
nlme_nodule_log_weights  0.00      1
lmer_gaussian_log       19.84      0
nlme_gaussian_log       19.84      0
Abbreviations:
 weights: vrI(spc) = 'varIdent(~1|spcode)'
Models ranked by AICc(x) 
Random terms (all models): 
  1 | spcode
AIC(lmer_gaussian_log, nlme_gaussian_log, nlme_nodule_log_weights)
                        df      AIC
lmer_gaussian_log        7 84.56385
nlme_gaussian_log        7 84.56385
nlme_nodule_log_weights  9 62.79085
# Model improved, check assumptions for nlme_gaussian_log_weights 

Models: Question 3

\[performance\sim treatment:fixer:scaled(trait)\ + initial\ height + random( 1|\ specie)\]

Scale preditors

data_for_models_scaled <-
    data_for_models %>% 
        mutate(across(c(4,9:14),scale)) 

Model Aboveground biomass

lme_above_biom_3way <- lme(above_biomass ~  treatment:nfixer:amax +
                                            treatment:nfixer:gs  +
                                            treatment:nfixer:wue +
                                            treatment:nfixer:pnue +
                                            treatment:nfixer:d13c +
                                            init_height ,
                    random = ~1 | spcode, 
                    data = data_for_models_scaled)

Model Belowground biommass

lme_below_biom_3way <- lme(below_biomass ~  treatment:nfixer:amax +
                                            treatment:nfixer:gs  +
                                            treatment:nfixer:wue +
                                            treatment:nfixer:pnue +
                                            treatment:nfixer:d13c +
                                            init_height ,
                    random = ~1 | spcode, 
                    data = data_for_models_scaled)

Model RGR

lme_rgr_3way <- lme(rgr ~   treatment:nfixer:amax +
                            treatment:nfixer:gs  +
                            treatment:nfixer:wue +
                            treatment:nfixer:pnue +
                            treatment:nfixer:d13c +
                            init_height ,
                  random = ~1 | spcode, 
                  data = data_for_models_scaled)

Model Total Biomass

lme_total_biom_3way <- lme(total_biomass ~  treatment:nfixer:amax +
                                            treatment:nfixer:gs  +
                                            treatment:nfixer:wue +
                                            treatment:nfixer:pnue +
                                            treatment:nfixer:d13c +
                                            init_height,
                            random = ~1 | spcode, 
                            data = data_for_models_scaled)

Model Validation

Validation of models for questions 1,2 and Mass fractions

Collinearity

map(models_q1_q2_mass_fractions, check_collinearity)
$wue_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
           nfixer 1.30         1.14      0.77
        treatment 3.85         1.96      0.26
      init_height 1.05         1.02      0.96
 nfixer:treatment 4.57         2.14      0.22

$pnue_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
           nfixer 1.20         1.09      0.84
        treatment 3.85         1.96      0.26
      init_height 1.05         1.03      0.95
 nfixer:treatment 4.31         2.08      0.23

$rmf_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
           nfixer 1.04         1.02      0.96
        treatment 3.87         1.97      0.26
      init_height 1.06         1.03      0.94
 nfixer:treatment 3.94         1.98      0.25

$smf_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
           nfixer 1.02         1.01      0.98
        treatment 3.88         1.97      0.26
      init_height 1.07         1.03      0.94
 nfixer:treatment 3.88         1.97      0.26

$lmf_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
           nfixer 1.05         1.02      0.95
        treatment 3.87         1.97      0.26
      init_height 1.06         1.03      0.94
 nfixer:treatment 3.95         1.99      0.25

$total_biomass
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.86         1.97      0.26
           nfixer 1.12         1.06      0.90
      init_height 1.06         1.03      0.95
 treatment:nfixer 4.12         2.03      0.24

$above_biomass
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.87         1.97      0.26
           nfixer 1.06         1.03      0.94
      init_height 1.06         1.03      0.94
 treatment:nfixer 3.98         2.00      0.25

$below_biomass
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.86         1.96      0.26
           nfixer 1.16         1.08      0.86
      init_height 1.05         1.03      0.95
 treatment:nfixer 4.22         2.06      0.24

$rgr
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.87         1.97      0.26
           nfixer 1.05         1.02      0.95
      init_height 1.06         1.03      0.94
 treatment:nfixer 3.96         1.99      0.25

$rmf
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.87         1.97      0.26
           nfixer 1.03         1.02      0.97
      init_height 1.06         1.03      0.94
 treatment:nfixer 3.92         1.98      0.25

$smf
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.87         1.97      0.26
           nfixer 1.06         1.03      0.94
      init_height 1.06         1.03      0.94
 treatment:nfixer 3.99         2.00      0.25

$lmf
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.87         1.97      0.26
           nfixer 1.04         1.02      0.96
      init_height 1.06         1.03      0.94
 treatment:nfixer 3.94         1.98      0.25

$amax
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.88         1.97      0.26
           nfixer 1.03         1.01      0.97
      init_height 1.07         1.03      0.94
 treatment:nfixer 3.90         1.98      0.26

$gs
# Check for Multicollinearity

Low Correlation

        Term  VIF Increased SE Tolerance
   treatment 3.83         1.96      0.26
      nfixer 1.92         1.39      0.52
 init_height 1.03         1.01      0.97

Moderate Correlation

             Term  VIF Increased SE Tolerance
 treatment:nfixer 6.08         2.47      0.16

$wue
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.85         1.96      0.26
           nfixer 1.19         1.09      0.84
      init_height 1.05         1.03      0.95
 treatment:nfixer 4.29         2.07      0.23

$d13c
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.85         1.96      0.26
           nfixer 1.20         1.09      0.84
      init_height 1.05         1.03      0.95
 treatment:nfixer 4.31         2.07      0.23

$pnue
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.85         1.96      0.26
           nfixer 1.30         1.14      0.77
      init_height 1.05         1.02      0.96
 treatment:nfixer 4.55         2.13      0.22
# No problems found here
map(models_q1_q2_mass_fractions, check_model)
$wue_log


$pnue_log


$rmf_log


$smf_log


$lmf_log


$total_biomass


$above_biomass


$below_biomass


$rgr


$rmf


$smf


$lmf


$amax


$gs


$wue


$d13c


$pnue

Validation of nodule count models

Checking log nodule count model’s assumptions

check_model(lmer_gaussian_log)

nlme_validation_plots(lmer_gaussian_log, data = data_nodules_cleaned, 
                      group = "spcode", variables = c("treatment"))

Checking improved nlme log model assumptions

Zuur et al pp 84:

“Note that these residuals still show heterogeneity, but this is now allowed (because the residual variation differs depending on the chosen variance structure and values of the variance covariate). Hence, these residuals are less useful for the model validation process.”

nlme_validation_plots(nlme_nodule_log_weights, data = data_nodules_cleaned,
                      group = "spcode", variables = c("treatment") )

Validation of models for question 3

Aboveground biomass

nlme_validation_plots(lme_above_biom_3way, data = data_for_models_scaled,
                      group = "spcode")

[1] "No variable specified inthe variables argument"

Belowground biomass

nlme_validation_plots(lme_below_biom_3way, data = data_for_models_scaled,
                      group = "spcode")

[1] "No variable specified inthe variables argument"

RGR biomass

nlme_validation_plots(lme_rgr_3way, data = data_for_models_scaled, group = "spcode")

[1] "No variable specified inthe variables argument"

Total biomass

nlme_validation_plots(lme_total_biom_3way, data = data_for_models_scaled,
                      group = "spcode")

[1] "No variable specified inthe variables argument"

Save model lists as .RData

Models for Q1, Q2 and Mass fractions

models_q1_q2_mass_fractions <-
    
    models_q1_q2_mass_fractions %>%
  
    # Remove models that showed violation of the assumptions
    purrr::list_modify("pnue" = NULL, "wue" = NULL, 
                       "rmf" = NULL, "smf" = NULL, "lmf" = NULL)

Nodule count

models_nodule_count <- list(nlme_gaussian_log, nlme_nodule_log_weights)

names(models_nodule_count) <- c("nlme_gaussian_log", "nlme_gaussian_log_weights")

3-way models

models_3way_q3<- list(lme_above_biom_3way, lme_below_biom_3way,
                           lme_rgr_3way, lme_total_biom_3way)

names(models_3way_q3) <- c("lme_above_biom_3way", "lme_below_biom_3way",
                               "lme_rgr_3way", "lme_total_biom_3way")
saveRDS(models_q1_q2_mass_fractions, file = "./processed_data/models_q1_q2_and_mass_fractions.RData") 
saveRDS(models_nodule_count, file = "./processed_data/models_nodule_count.RData") 
saveRDS(models_3way_q3, file = "./processed_data/models_3way_q3.RData")